/*===================================================================
assessments.do
	*Last Updated:	2021-08-13
	*Author(s):		David Phillips, Sean McConville, Grace Ortuzar, Charlie Law
	*Purpose:		Combines all baseline data on homelessness prevention
					applicants in SCC. Merges data from HMIS assessments,
					HMIS demographic files, and the RCT intake. Limits to
					sample of interest
===================================================================*/

	
	
*RCT intake data 
	cd "${mydir}"
	clear
	import delimited using "scc_hp_part_dh_06212021_deident.csv", clear
	tempfile temp
	save `temp'
	cd "${datadir}"
	import delimited  using "scc_hp_part_dh_06212021_crosswalk.csv", clear
	merge 1:1 leoid using `temp'
	drop leoid _merge
	rename hmis1_text HMISID
	tab eligible_other_funds, m 
	tab treatment_sum, m 
	tab treatment_sum if eligible_other_funds==1 
	replace HMISID=upper(HMISID)
	gen O=0
	tostring O, replace
	replace HMISID=regexr(HMISID,"O",O)
	replace HMISID=regexr(HMISID,"O",O) 
	drop O
	gen length=length(HMISID) /*all 9 digits*/ 
	
	split submissiondate
	drop submissiondate2
	rename submissiondate1 date
	
	// gen date=substr(submissiondate,1,8) Got rid of this--date substrings are different lenghts, so this wasn't consistently working
	*brow submissiondate date
	
	gen date2 = date(date, "MDY")
	format date2 %td
	drop if date2<td(01jul2019) 	
	drop if date2>td(31dec2020)
	rename date2 rct_dt
	
	tempfile rct
	save `rct' 
	
*Process HMIS Assessments
	cd "$mydir"
	
	*all prevention assessments
	tempfile temp
	import excel "$datadir/LEO PR-VISPDAT 5.3.2021.xlsx", sheet("PRVISPDAT") firstrow clear
	save `temp', replace	
	*import client demographics
	import excel "$datadir/LEO PR-VISPDAT 5.3.2021.xlsx", sheet("Demographics") firstrow clear
	*merge with program enrollment, assessment data, and treatment status
	merge 1:m ClientsUniqueIdentifier using `temp'
	drop if _merge == 1
	drop _merge
	

*merge assessements with rct intake data
	rename ClientsUniqueIdentifier HMISID 
    merge m:1 HMISID using `rct' 
	drop _merge 

	*focus on first asssesment for a client; or if rct participant, most recent one to intake 
		duplicates drop HMISID ClientAssessmentsAssessmentDa ClientAssessmentsAssessmentSc, force
	
		*if rct participant, take the one that's before the rct intake but closest to it; if none, cut them
		drop if ClientAssessmentsAssessmentDa > rct_dt & rct_dt ~= .
		gsort HMISID - ClientAssessmentsAssessmentDa
		bysort HMISID: gen n=_n 		
		drop if n>1 & rct_dt ~= .
		drop n

		*if not rct participant, take the earliest one
		sort HMISID ClientAssessmentsAssessmentDa
		bysort HMISID: gen n=_n 
		drop if n>1 & rct_dt == .
		drop n
		count /*9,815*/
	
	rename ClientAssessmentsAssessmentDa first_assessment_dt
	rename ClientAssessmentsAssessmentSc first_assmt_score /*important to distinguish it from the ClienAssessmentSC of the enrollment file*/ 
	sort first_assessment_dt


	*Create month index 
	gen mm_a=month(first_assessment_dt)
	gen q_a=quarter(first_assessment_dt)
	gen year_a=year(first_assessment_dt)
	order mm_a year_a, after (first_assessment_dt)
	gen month_a=12*(year_a-2018)+mm -6 /*month 1 is july 2018*/ 
	gen quarter_a=4*(year_a-2018)+q_a-2
	order month_a, after(mm_a)
	sort first_assessment_dt

	sort quarter_a
	tab month_a,m 
	label var month_a "Month of First Assessment" 
	label define months 1 "July 2018" 2 "Aug 2018" 3 "Sept 2018" 4 "Oct 2018" 5 "Nov 2018" 6 "Dec 2018" 7 "Jan 2019" 8 "Feb 2019" 9 "Mar 2019" ///
	10 "Apr 2019" 11 "May 2019" 12 "Jun 2019" 13 "July 2019" 14 "Aug 2019" 15 "Sept 2019" 16 "Oct 2019" 17 "Nov 2019" 18 "Dec 2019" 19 "Jan 2020" 20 "Feb 2020" 21 "Mar 2020" 22 "Apr 2020" 23 "May 2020" 24 "Jun 2020" 25 "July 2020" ///
	26 "Aug 2020" 27 "Sept 2020" 28 "Oct 2020" 29 "Nov 2020" 30 "Dec 2020" 31 "Jan 2021" 32 "Feb 2021" 33 "Mar 2021" 32 "Apr 2021" 33 "May 2021" 34 "Jun 2021"
	label values month_a months 
	
	*create semesters
	gen semester_a=. 
	replace semester_a=1 if quarter_a==1|quarter_a==2
	replace semester_a=2 if quarter_a==3|quarter_a==4
	replace semester_a=3 if quarter_a==5|quarter_a==6
	replace semester_a=4 if quarter_a==7|quarter_a==8
	replace semester_a=5 if quarter_a==9|quarter_a==10
	tab semester_a 
	
	
	*define the focal date = study date if there is one; otherwise, first assessment date
	gen startdate = first_assessment_dt
	replace startdate = rct_dt if rct_dt ~= .
	
		
	*need to rename the variables from the pr-vispdat assessment 
	gen assmtscore = first_assmt_score
	rename ClientAssessmentsAssessmentNa assmtname
	rename ClientAssessmentCustomAreyou harmrisk
	rename ClientAssessmentCustomHaveyo violence
	rename ClientAssessmentCustomIsyour famabuse
	rename ClientAssessmentCustomIdono  trauma
	rename ClientAssessmentCustomDoyou legal
	rename J risky
	rename K inflicharm
	rename ClientAssessmentCustomIsanyo beenforced
	rename ClientAssessmentCustomIffema pregnant
	rename ClientAssessmentCustomAtany unfithous
	rename ClientAssessmentCustomIFYES num_unfith
	rename P tot_unfith
	rename ClientAssessmentCustomPRVIS Q11
	rename ClientAssessmentCustomWithin neighbissu
	rename ClientAssessmentCustomAccessi disabled
	rename ClientAssessmentCustomApoor poorcredit
	rename ClientAssessmentCustomRestric houselimit
	rename ClientAssessmentCustomNorefe badrefer
	rename ClientAssessmentCustomDifficu engtrouble
	rename X mathtrouble
	rename ClientAssessmentCustomSafety safeconcern
	rename Z overcrowd
	rename ClientAssessmentCustomIfyour wouldremain
	rename ClientAssessmentCustomIsther owemoney
	rename ClientAssessmentCustomWhatis debt
	rename AD moneyflow
	rename AE nextpayday
	rename AF paydayamount
	rename AG currentbalanc
	rename AH fin_help
	rename ClientAssessmentCustomInthe unpaidloan
	rename AJ Q21
	rename AK fulfilled
	rename AL basicneed
	rename ClientAssessmentCustomReceive healthER
	rename ClientAssessmentCustomTakena ambulanc
	rename ClientAssessmentCustomBeenho inpatient
	rename ClientAssessmentCustomUseda crisisserv
	rename ClientAssessmentCustomTalked police
	rename ClientAssessmentCustomStayed prison
	rename AS physhea
	rename AT chronichea
	rename AU physlimit
	rename ClientAssessmentCustomWhenyo avoidhelp
	rename ClientAssessmentCustomHasyou drugevict
	rename ClientAssessmentCustomDoesdr drughous
	rename ClientAssessmentCustomAmenta mental_hea
	rename ClientAssessmentCustomApast headinj
	rename ClientAssessmentCustomAlearn learndis
	rename BB brainissues
	rename ClientAssessmentCustomArethe prescribed
	rename BD abusemeds
	rename BE childr_gone
	rename BF famlegal
	rename ClientAssessmentCustomDoyour childschoo
	rename ClientAssessmentCustomHaveth famchange
	rename BI famtime
	rename BJ sibtutor
	rename ClientAssessmentCustom3ormo thirt_tut
	rename ClientAssessmentCustom2ormo twelv_tut
	rename ClientsRace race
	rename ClientsEthnicity ethni
	rename ClientsCurrentAge age
	rename ClientsGender gend
	
	*processing assessment variables
		*turn the yes/no string variables into numeric dummies
		global yes_no_strings harmrisk violence famabuse trauma ///
		 legal risky inflicharm beenforced pregnant ///
		 unfithous Q11 disabled poorcredit houselimit ///
		 badrefer engtrouble mathtrouble safeconcern ///
		 overcrowd wouldremain owemoney moneyflow fin_help ///
		 Q21 fulfilled basicneed physhea ///
		 chronichea physlimit avoidhelp ///
		 drugevict drughous mental_hea headinj ///
		 learndis brainissues prescribed ///
		 abusemeds childr_gone famlegal ///
		 childschoo famchange famtime ///
		 sibtutor thirt_tut twelv_tut

		 foreach var of global yes_no_strings {
		  gen `var'Y=.
		  replace `var'Y=1 if `var' =="Yes"
		  replace `var'Y=0 if `var'=="No" 
			}

		*change the tiered string variables to numeric
		foreach var of varlist num_unfith neighbissu unpaidloan ///
		healthER inpatient crisisserv police prison {
			gen `var'N=.
			replace `var'N=0 if `var'=="0"
			replace `var'N=1 if `var'=="1"
			replace `var'N=2 if `var'=="2"
			replace `var'N=3 if (`var'=="3" | `var'=="3 or more")
			replace `var'N=4 if `var'=="4 or more"
			}
		
		gen tot_unfithN=.
			replace tot_unfithN=3.5 if tot_unfith=="less than a week"
			replace tot_unfithN=(365/12*3 + 7)/2 if tot_unfith=="1 week - 3 months"
			replace tot_unfithN=4.5*365/12 if tot_unfith=="3 - 6 months"
			replace tot_unfithN=9*365/12 if tot_unfith=="6 months to 1 year"
			replace tot_unfithN=365*1.5 if tot_unfith=="1-2 years"
			replace tot_unfithN=365*3 if tot_unfith=="2 years or more"

			
		*assign numeric values to client demographics
		foreach var of varlist ethni {
			gen ethni_d=.
			replace ethni_d=1 if ethni=="Hispanic/Latino"
			replace ethni_d=0 if ethni=="Non-Hispanic/Non-Latino"
			}

		*create dummy variables for race and gend categories
			*white is reference category for race
		gen black=.
			replace black=0 if race=="White"|race=="Asian"|race=="American Indian or Alaska Native"|race=="Multi-Racial"|race=="Native Hawaiian or Other Pacific Islander"
			replace black=1 if race=="Black or African American"
		gen asian=. 
			replace asian=0 if race=="White"|race=="Black or African American"|race=="American Indian or Alaska Native"|race=="Multi-Racial"|race=="Native Hawaiian or Other Pacific Islander"
			replace asian=1 if race=="Asian"
		gen amer_indian=.
			replace amer_indian=0 if race=="White"|race=="Black or African American"|race=="Asian"|race=="Multi-Racial"|race=="Native Hawaiian or Other Pacific Islander"
			replace amer_indian=1 if race=="American Indian or Alaska Native"
		gen multi_racial=.
			replace multi_racial=0 if race=="White"|race=="Black or African American"|race=="Asian"|race=="American Indian or Alaska Native"|race=="Native Hawaiian or Other Pacific Islander"
			replace multi_racial=1 if race=="Multi-Racial"
		gen pac_islander=.
			replace pac_islander=0 if race=="White"|race=="Black or African American"|race=="Asian"|race=="American Indian or Alaska Native"|race=="Multi-Racial"
			replace pac_islander=1 if race=="Native Hawaiian or Other Pacific Islander"
			*female is reference category for gend
		gen male=.
			replace male=0 if gend=="Female"|gend=="Trans Female (MTF or Male to Female)"|gend=="Trans Male (FTM or Female to Male)"|gend=="gend Non-Conforming (i.e. not exclusively male or female)"
			replace male=1 if gend=="Male"
		gen trans_female=.
			replace trans_female=0 if gend=="Female"|gend=="Male"|gend=="Trans Male (FTM or Female to Male)"|gend=="gend Non-Conforming (i.e. not exclusively male or female)"
			replace trans_female=1 if gend=="Trans Female (MTF or Male to Female)"
		gen trans_male=.
			replace trans_male=0 if gend=="Female"|gend=="Male"|gend=="Trans Female (MTF or Male to Female)"|gend=="gend Non-Conforming (i.e. not exclusively male or female)"
			replace trans_male=1 if gend=="Trans Male (FTM or Female to Male)"
		gen nonconform=.
			replace nonconform=0 if gend=="Female"|gend=="Male"|gend=="Trans Female (MTF or Male to Female)"|gend=="Trans Male (FTM or Female to Male)"
			replace nonconform=1 if gend=="gend Non-Conforming (i.e. not exclusively male or female)"

	
		gen grp_det = floor(first_assmt_sc/4)
		replace grp_det = 4 if grp_det >= 4
	
		label define group_label 0 "0 to 3" 1 "4 to 7" 2 "8 to 11" 3 "12 to 15" 4 "16+"
		label values grp_det  	group_label

		
		gen eligible_hp=0 
		replace eligible_hp=1 if first_assessment_dt<td(01feb2018) & first_assmt_score>=16
		replace eligible_hp=1 if td(01feb2018)<=first_assessment_dt & first_assessment_dt<=td(30jun2019) & first_assmt_score>=12
		replace eligible_hp=1 if treatment_sum==1
		replace eligible_hp=1 if first_assessment_dt>td(01jul2019) & first_assmt_score>=14


	codebook first_assessment_dt




	*accounting for missing data in Xs
		foreach var of global control_dummy_vars {
			gen `var'm=(`var'==.)
			gen `var'z=`var'
			replace `var'z=0 if `var'==.
			}
		
		
	save "$datadir/assmt.dta", replace
	
	